Skip to content

Method: {...}

1: /*
2: * *********************************************************************************************************************
3: *
4: * blueMarine II: Semantic Media Centre
5: * http://tidalwave.it/projects/bluemarine2
6: *
7: * Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
8: *
9: * *********************************************************************************************************************
10: *
11: * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
12: * the License. You may obtain a copy of the License at
13: *
14: * http://www.apache.org/licenses/LICENSE-2.0
15: *
16: * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
17: * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
18: * specific language governing permissions and limitations under the License.
19: *
20: * *********************************************************************************************************************
21: *
22: * git clone https://bitbucket.org/tidalwave/bluemarine2-src
23: * git clone https://github.com/tidalwave-it/bluemarine2-src
24: *
25: * *********************************************************************************************************************
26: */
27: package it.tidalwave.bluemarine2.downloader.impl;
28:
29: import javax.annotation.Nonnull;
30: import javax.annotation.PostConstruct;
31: import javax.inject.Inject;
32: import java.util.Date;
33: import java.util.List;
34: import java.io.ByteArrayOutputStream;
35: import java.io.IOException;
36: import java.net.URISyntaxException;
37: import java.net.URL;
38: import org.apache.http.HttpRequest;
39: import org.apache.http.HttpResponse;
40: import org.apache.http.HttpResponseInterceptor;
41: import org.apache.http.ProtocolException;
42: import org.apache.http.client.RedirectStrategy;
43: import org.apache.http.client.cache.CacheResponseStatus;
44: import org.apache.http.client.cache.HttpCacheContext;
45: import org.apache.http.client.cache.HttpCacheEntry;
46: import org.apache.http.client.cache.Resource;
47: import org.apache.http.client.methods.CloseableHttpResponse;
48: import org.apache.http.client.methods.HttpGet;
49: import org.apache.http.client.methods.HttpUriRequest;
50: import org.apache.http.impl.client.CloseableHttpClient;
51: import org.apache.http.impl.client.cache.CacheConfig;
52: import org.apache.http.impl.client.cache.CachingHttpClients;
53: import org.apache.http.impl.client.cache.HeapResource;
54: import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
55: import org.apache.http.message.BasicHeader;
56: import org.apache.http.protocol.HttpContext;
57: import it.tidalwave.util.NotFoundException;
58: import it.tidalwave.util.annotation.VisibleForTesting;
59: import it.tidalwave.messagebus.MessageBus;
60: import it.tidalwave.messagebus.annotation.ListensTo;
61: import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
62: import it.tidalwave.bluemarine2.message.PowerOnNotification;
63: import it.tidalwave.bluemarine2.downloader.DownloadComplete;
64: import it.tidalwave.bluemarine2.downloader.DownloadComplete.Origin;
65: import it.tidalwave.bluemarine2.downloader.DownloadRequest;
66: import lombok.Cleanup;
67: import lombok.extern.slf4j.Slf4j;
68: import static it.tidalwave.bluemarine2.downloader.DownloaderPropertyNames.CACHE_FOLDER_PATH;
69:
70: /***********************************************************************************************************************
71: *
72: * @author Fabrizio Giudici
73: *
74: **********************************************************************************************************************/
75: @SimpleMessageSubscriber @Slf4j
76: public class DefaultDownloader
77: {
78: @Inject
79: private MessageBus messageBus;
80:
81: @Inject
82: private SimpleHttpCacheStorage cacheStorage;
83:
84: private PoolingHttpClientConnectionManager connectionManager;
85:
86: private CacheConfig cacheConfig;
87:
88: private CloseableHttpClient httpClient;
89:
90: private final HttpResponseInterceptor killCacheHeaders = (HttpResponse
91: response, HttpContext context) ->
92: {
93: response.removeHeaders("Expires");
94: response.removeHeaders("Pragma");
95: response.removeHeaders("Cache-Control");
96: response.addHeader("Expires", "Mon, 31 Dec 2099 00:00:00 GMT");
97: };
98:
99: /*******************************************************************************************************************
100: *
101: *
102: *
103: ******************************************************************************************************************/
104: // FIXME: this is because there's a fix, and we explicitly save stuff in the cache - see below
105: private final RedirectStrategy dontFollowRedirect = new RedirectStrategy()
106: {
107: @Override
108: public boolean isRedirected (HttpRequest request, HttpResponse response, HttpContext context)
109: throws ProtocolException
110: {
111: return false;
112: }
113:
114: @Override
115: public HttpUriRequest getRedirect (HttpRequest request, HttpResponse response, HttpContext context)
116: throws ProtocolException
117: {
118: return null;
119: }
120: };
121:
122: /*******************************************************************************************************************
123: *
124: *
125: *
126: ******************************************************************************************************************/
127: @PostConstruct
128: @VisibleForTesting void initialize()
129: {
130: connectionManager = new PoolingHttpClientConnectionManager();
131: connectionManager.setMaxTotal(200);
132: connectionManager.setDefaultMaxPerRoute(20);
133:
134: cacheConfig = CacheConfig.custom()
135: .setAllow303Caching(true)
136: .setMaxCacheEntries(Integer.MAX_VALUE)
137: .setMaxObjectSize(Integer.MAX_VALUE)
138: .setSharedCache(false)
139: .setHeuristicCachingEnabled(true)
140: .build();
141: httpClient = CachingHttpClients.custom()
142: .setHttpCacheStorage(cacheStorage)
143: .setCacheConfig(cacheConfig)
144: .setRedirectStrategy(dontFollowRedirect)
145: .setUserAgent("blueMarine (fabrizio.giudici@tidalwave.it)")
146: .setDefaultHeaders(List.of(new BasicHeader("Accept", "application/n3")))
147: .setConnectionManager(connectionManager)
148: .addInterceptorFirst(killCacheHeaders) // FIXME: only if explicitly configured
149: .build();
150: }
151:
152: /*******************************************************************************************************************
153: *
154: *
155: *
156: ******************************************************************************************************************/
157: @VisibleForTesting void onPowerOnNotification (@ListensTo @Nonnull final PowerOnNotification notification)
158: throws NotFoundException
159: {
160: log.info("onPowerOnNotification({})", notification);
161: cacheStorage.setFolderPath(notification.getProperties().get(CACHE_FOLDER_PATH));
162: }
163:
164: /*******************************************************************************************************************
165: *
166: *
167: *
168: ******************************************************************************************************************/
169: @VisibleForTesting void onDownloadRequest (@ListensTo @Nonnull final DownloadRequest request)
170: throws URISyntaxException
171: {
172: try
173: {
174: log.info("onDownloadRequest({})", request);
175:
176: URL url = request.getUrl();
177:
178: for (;;)
179: {
180: final HttpCacheContext context = HttpCacheContext.create();
181: @Cleanup final CloseableHttpResponse response = httpClient.execute(new HttpGet(url.toURI()), context);
182: final byte[] bytes = bytesFrom(response);
183: final CacheResponseStatus cacheResponseStatus = context.getCacheResponseStatus();
184: log.debug(">>>> cacheResponseStatus: {}", cacheResponseStatus);
185:
186: final Origin origin = cacheResponseStatus.equals(CacheResponseStatus.CACHE_HIT) ? Origin.CACHE
187: : Origin.NETWORK;
188:
189: // FIXME: shouldn't do this by myself
190: // FIXME: upon configuration, everything should be cached (needed for supporting integration tests)
191: if (!origin.equals(Origin.CACHE) && List.of(200, 303).contains(response.getStatusLine().getStatusCode()))
192: {
193: final Date date = new Date();
194: final Resource resource = new HeapResource(bytes);
195: cacheStorage.putEntry(url.toExternalForm(),
196: new HttpCacheEntry(date, date, response.getStatusLine(), response.getAllHeaders(), resource));
197: }
198:
199: // FIXME: if the redirect were enabled, we could drop this check
200: if (request.isOptionPresent(DownloadRequest.Option.FOLLOW_REDIRECT)
201: && response.getStatusLine().getStatusCode() == 303) // SEE_OTHER FIXME
202: {
203: url = new URL(response.getFirstHeader("Location").getValue());
204: log.info(">>>> following 'see also' to {} ...", url);
205: }
206: else
207: {
208: messageBus.publish(new DownloadComplete(request.getUrl(),
209: response.getStatusLine().getStatusCode(),
210: bytes,
211: origin));
212: return;
213: }
214: }
215: }
216: catch (IOException e)
217: {
218: log.error("{}: {}", request.getUrl(), e.toString());
219: messageBus.publish(new DownloadComplete(request.getUrl(), -1, new byte[0], Origin.NETWORK));
220: }
221: }
222:
223: /*******************************************************************************************************************
224: *
225: *
226: *
227: ******************************************************************************************************************/
228: @Nonnull
229: private byte[] bytesFrom (@Nonnull final HttpResponse response)
230: throws IOException
231: {
232: final ByteArrayOutputStream baos = new ByteArrayOutputStream();
233:
234: if (response.getEntity() != null)
235: {
236: response.getEntity().writeTo(baos);
237: }
238:
239: return baos.toByteArray();
240: }
241: }